This document demonstrates the use of the bRF and LASSO-D3S functions for integrative GRN inference.
Those functions infer the regulatory pathways of Arabidopsis thaliana’s roots in response to nitrate (N) induction from Varala et al., 2018.
They use as inputs the expression profiles of N-responsive genes and TFBS information. Prior TFBS information was built by searching in the promoters of the N-responsive genes the PWM of the N-responsive regulators.
Import of the expression data and the N-responsive genes and regulators :
load('rdata/inference_input_N_response_varala.rdata')
genes <- input_data$grouped_genes; length(genes)
## [1] 1426
tfs <- input_data$grouped_regressors; length(tfs)
## [1] 201
counts <- input_data$counts; dim(counts)
## [1] 1426 45
load("rdata/pwm_occurrences_N_response_varala.rdata")
dim(pwm_occurrence)
## [1] 1426 201
Without setting any seed, mse profiles are quite similar.
ALPHAS <- seq(0,1, by = 0.1)
subset <- sample(genes, replace = F, size = 20)
lmses <- data.frame(row.names = subset)
lmses_perm<- data.frame(row.names = subset)
for(alpha in ALPHAS){
# set.seed(121314)
lmses[,as.character(alpha)] <- bRF_inference_MSE(counts, subset, tfs, alpha = alpha, nTrees = 4000,
pwm_occurrence = pwm_occurrence, nCores = 15)
# set.seed(121314)
lmses_perm[,as.character(alpha)] <- bRF_inference_MSE(counts, subset, tfs, alpha = alpha, nTrees = 4000,
pwm_occurrence = pwm_occurrence, nCores = 15, tf_expression_permutation = F)
}
## 2.972 sec elapsed
## 2.92 sec elapsed
## 2.688 sec elapsed
## 2.953 sec elapsed
## 2.78 sec elapsed
## 3.008 sec elapsed
## 3.528 sec elapsed
## 3.652 sec elapsed
## 3.76 sec elapsed
## 3.721 sec elapsed
## 3.758 sec elapsed
## 3.716 sec elapsed
## 3.802 sec elapsed
## 3.746 sec elapsed
## 3.551 sec elapsed
## 3.715 sec elapsed
## 3.634 sec elapsed
## 3.862 sec elapsed
## 3.623 sec elapsed
## 3.417 sec elapsed
## 3.365 sec elapsed
## 3.553 sec elapsed
draw_gene <- function(gene){
df <- data.frame("mse1"=as.numeric(lmses[gene,]),
"mse2"=as.numeric(lmses_perm[gene,]), alpha = ALPHAS)
gather(df, mse1, mse2, key = "permutation", value = "mse") %>%
ggplot(aes(x=alpha, y=mse, group =permutation, color = permutation))+
geom_line()
}
for(gene in subset){
print(draw_gene(gene))
}
lmses <- data.frame(row.names = subset)
lmses_perm<- data.frame(row.names = subset)
for(alpha in ALPHAS){
# set.seed(121314)
lmses[,as.character(alpha)] <- LASSO.D3S_inference_MSE(counts, subset, tfs, alpha = alpha, N=200,
pwm_occurrence = pwm_occurrence, nCores = 15)
lmses_perm[,as.character(alpha)] <- LASSO.D3S_inference_MSE(counts, subset, tfs, alpha = alpha, N=200,
pwm_occurrence = pwm_occurrence, nCores = 15, tf_expression_permutation = F)
}
## 4.13 sec elapsed
## 4.007 sec elapsed
## 3.431 sec elapsed
## 3.516 sec elapsed
## 3.317 sec elapsed
## 3.696 sec elapsed
## 3.606 sec elapsed
## 3.161 sec elapsed
## 3.183 sec elapsed
## 3.305 sec elapsed
## 3.75 sec elapsed
## 2.891 sec elapsed
## 2.764 sec elapsed
## 2.959 sec elapsed
## 2.829 sec elapsed
## 2.639 sec elapsed
## 2.68 sec elapsed
## 2.807 sec elapsed
## 2.517 sec elapsed
## 2.675 sec elapsed
## 2.864 sec elapsed
## 2.595 sec elapsed
for(gene in subset){
print(draw_gene(gene))
}
lmses <- data.frame(row.names = subset)
lmses_perm<- data.frame(row.names = subset)
for(alpha in ALPHAS){
lmses[,as.character(alpha)] <- bRF_inference_MSE(counts, subset, tfs, alpha = alpha, nTrees = 4000,seed =121314,
pwm_occurrence = pwm_occurrence, nCores = 15, tf_expression_permutation = F)
for(perm in 1:10){
lmses_perm[,paste(as.character(alpha), perm)] <- bRF_inference_MSE(counts, subset, tfs, alpha = alpha, nTrees = 4000,
pwm_occurrence = pwm_occurrence, nCores = 15, tf_expression_permutation = T, seed = 121314)
}
}
## 2.986 sec elapsed
## 2.88 sec elapsed
## 2.803 sec elapsed
## 2.901 sec elapsed
## 2.744 sec elapsed
## 2.897 sec elapsed
## 2.707 sec elapsed
## 2.714 sec elapsed
## 2.724 sec elapsed
## 2.772 sec elapsed
## 2.746 sec elapsed
## 2.788 sec elapsed
## 2.793 sec elapsed
## 2.757 sec elapsed
## 2.926 sec elapsed
## 2.795 sec elapsed
## 2.698 sec elapsed
## 2.73 sec elapsed
## 2.689 sec elapsed
## 2.888 sec elapsed
## 2.656 sec elapsed
## 2.805 sec elapsed
## 2.61 sec elapsed
## 2.708 sec elapsed
## 2.665 sec elapsed
## 2.746 sec elapsed
## 2.568 sec elapsed
## 2.676 sec elapsed
## 2.681 sec elapsed
## 2.726 sec elapsed
## 2.771 sec elapsed
## 2.794 sec elapsed
## 2.674 sec elapsed
## 2.855 sec elapsed
## 2.873 sec elapsed
## 2.692 sec elapsed
## 2.808 sec elapsed
## 2.729 sec elapsed
## 2.909 sec elapsed
## 2.758 sec elapsed
## 3.1 sec elapsed
## 3.23 sec elapsed
## 2.853 sec elapsed
## 2.737 sec elapsed
## 3.242 sec elapsed
## 3.202 sec elapsed
## 3.258 sec elapsed
## 3.117 sec elapsed
## 3.216 sec elapsed
## 3.045 sec elapsed
## 3.059 sec elapsed
## 2.735 sec elapsed
## 3.163 sec elapsed
## 3.101 sec elapsed
## 3.165 sec elapsed
## 3.133 sec elapsed
## 3.125 sec elapsed
## 3.053 sec elapsed
## 2.74 sec elapsed
## 3.081 sec elapsed
## 3.215 sec elapsed
## 3.027 sec elapsed
## 3.128 sec elapsed
## 3.123 sec elapsed
## 3.181 sec elapsed
## 3.048 sec elapsed
## 2.955 sec elapsed
## 3.178 sec elapsed
## 3.102 sec elapsed
## 3.152 sec elapsed
## 3.164 sec elapsed
## 3.039 sec elapsed
## 3.139 sec elapsed
## 3.084 sec elapsed
## 3.05 sec elapsed
## 2.949 sec elapsed
## 3.171 sec elapsed
## 3.118 sec elapsed
## 3.65 sec elapsed
## 3.33 sec elapsed
## 3.286 sec elapsed
## 3.007 sec elapsed
## 3.152 sec elapsed
## 3.493 sec elapsed
## 3.646 sec elapsed
## 3.602 sec elapsed
## 3.539 sec elapsed
## 3.583 sec elapsed
## 3.759 sec elapsed
## 3.878 sec elapsed
## 3.607 sec elapsed
## 3.658 sec elapsed
## 3.543 sec elapsed
## 3.546 sec elapsed
## 3.74 sec elapsed
## 2.801 sec elapsed
## 2.727 sec elapsed
## 2.618 sec elapsed
## 2.851 sec elapsed
## 2.812 sec elapsed
## 2.763 sec elapsed
## 2.629 sec elapsed
## 2.79 sec elapsed
## 2.715 sec elapsed
## 2.706 sec elapsed
## 2.771 sec elapsed
## 2.909 sec elapsed
## 2.715 sec elapsed
## 2.707 sec elapsed
## 2.729 sec elapsed
## 2.813 sec elapsed
## 2.873 sec elapsed
## 2.682 sec elapsed
## 2.804 sec elapsed
## 2.567 sec elapsed
## 2.786 sec elapsed
## 2.651 sec elapsed
## 2.822 sec elapsed
## 2.587 sec elapsed
## 2.651 sec elapsed
## 2.819 sec elapsed
draw_gene <- function(gene){
cbind.data.frame(lmses[gene,], lmses_perm[gene,]) %>%
gather() %>%
separate(key, into = c("alpha", "rep"), sep = " ") %>%
mutate(permutated = !is.na(rep))%>%
ggplot(aes(x=alpha, y=value, group =rep,
color = permutated))+
geom_line()
}
# draw_gene <- function(gene){
#
# cbind.data.frame(lmses[gene,], lmses_perm[gene,]) %>%
# gather() %>%
# separate(key, into = c("alpha", "rep"), sep = " ") %>%
# mutate(permutated = !is.na(rep))%>%
# group_by(alpha, permutated) %>%
# summarise(mean_mse = mean(value, na.rm=T),
# std_mse = sd(value, na.rm=T))%>%
# ggplot(aes(x=alpha, y=mean_mse,
# color = permutated))+
# geom_point()
# }
for(gene in subset){
print(draw_gene(gene))
}
lmses <- data.frame(row.names = subset)
lmses_perm<- data.frame(row.names = subset)
for(alpha in ALPHAS){
# set.seed(121314)
lmses[,as.character(alpha)] <- LASSO.D3S_inference_MSE(counts, subset, tfs, alpha = alpha, N=200,seed=121314,
pwm_occurrence = pwm_occurrence, nCores = 15, tf_expression_permutation = F)
for(perm in 1:10){
lmses_perm[,paste(as.character(alpha), perm)] <- LASSO.D3S_inference_MSE(counts, subset, tfs, alpha = alpha, N=200,seed = 121314,
pwm_occurrence = pwm_occurrence, nCores = 15, tf_expression_permutation = T)
}
}
## 3.333 sec elapsed
## 3.354 sec elapsed
## 3.692 sec elapsed
## 3.287 sec elapsed
## 3.346 sec elapsed
## 3.271 sec elapsed
## 3.43 sec elapsed
## 3.645 sec elapsed
## 3.247 sec elapsed
## 3.376 sec elapsed
## 3.447 sec elapsed
## 3.331 sec elapsed
## 3.247 sec elapsed
## 3.358 sec elapsed
## 3.652 sec elapsed
## 3.268 sec elapsed
## 3.225 sec elapsed
## 3.306 sec elapsed
## 3.812 sec elapsed
## 3.233 sec elapsed
## 3.427 sec elapsed
## 3.624 sec elapsed
## 3.252 sec elapsed
## 3.294 sec elapsed
## 3.169 sec elapsed
## 3.319 sec elapsed
## 3.158 sec elapsed
## 3.324 sec elapsed
## 3.553 sec elapsed
## 3.249 sec elapsed
## 3.305 sec elapsed
## 3.529 sec elapsed
## 3.576 sec elapsed
## 3.164 sec elapsed
## 3.39 sec elapsed
## 3.195 sec elapsed
## 3.15 sec elapsed
## 3.507 sec elapsed
## 3.141 sec elapsed
## 3.259 sec elapsed
## 3.201 sec elapsed
## 3.373 sec elapsed
## 3.062 sec elapsed
## 3.083 sec elapsed
## 2.962 sec elapsed
## 3.102 sec elapsed
## 3.063 sec elapsed
## 3.333 sec elapsed
## 3.414 sec elapsed
## 2.933 sec elapsed
## 3.144 sec elapsed
## 3.213 sec elapsed
## 3.137 sec elapsed
## 3.753 sec elapsed
## 3.079 sec elapsed
## 2.902 sec elapsed
## 2.918 sec elapsed
## 2.819 sec elapsed
## 2.748 sec elapsed
## 2.854 sec elapsed
## 2.913 sec elapsed
## 3.058 sec elapsed
## 3.044 sec elapsed
## 3.191 sec elapsed
## 3.023 sec elapsed
## 3.426 sec elapsed
## 2.865 sec elapsed
## 2.906 sec elapsed
## 2.995 sec elapsed
## 2.909 sec elapsed
## 3.189 sec elapsed
## 2.998 sec elapsed
## 3.199 sec elapsed
## 2.911 sec elapsed
## 3 sec elapsed
## 2.853 sec elapsed
## 2.899 sec elapsed
## 2.959 sec elapsed
## 2.991 sec elapsed
## 2.996 sec elapsed
## 2.91 sec elapsed
## 2.897 sec elapsed
## 2.971 sec elapsed
## 3.131 sec elapsed
## 3.091 sec elapsed
## 2.865 sec elapsed
## 2.766 sec elapsed
## 3.183 sec elapsed
## 3.097 sec elapsed
## 3.468 sec elapsed
## 2.834 sec elapsed
## 3.047 sec elapsed
## 2.829 sec elapsed
## 2.891 sec elapsed
## 2.984 sec elapsed
## 2.913 sec elapsed
## 2.932 sec elapsed
## 2.683 sec elapsed
## 2.801 sec elapsed
## 2.893 sec elapsed
## 3.144 sec elapsed
## 2.95 sec elapsed
## 3.242 sec elapsed
## 2.858 sec elapsed
## 3.008 sec elapsed
## 2.993 sec elapsed
## 2.769 sec elapsed
## 3.11 sec elapsed
## 2.771 sec elapsed
## 2.969 sec elapsed
## 2.891 sec elapsed
## 2.994 sec elapsed
## 2.63 sec elapsed
## 2.997 sec elapsed
## 2.934 sec elapsed
## 2.994 sec elapsed
## 2.929 sec elapsed
## 3.018 sec elapsed
## 2.898 sec elapsed
## 3.002 sec elapsed
## 3.389 sec elapsed
for(gene in subset){
print(draw_gene(gene))
}